/*******************************************************************************
																				
	DESCRIPTION:  	This do file carries out the proportional hazards tests in
					Appendix B.3.
	
*******************************************************************************/

clear all
global id_code 136

pause on
set seed 2110

* Get 2006 sample at start of spell and merge with 6M and 2009 predictions:
use Lop* In* year n emplAft6M_0M_In p_emplAft6M_0M_In startU trueEnd dur* if !missing(p_emplAft6M_0M_In) using "${data}/003_MainWithEnsemblePred_Full_2006.dta", clear

* Merge with predictions from models trained on individuals who are 
* unemployed for a different amount of time than those for whom the
* predictions are made
merge 1:1 LopNr_PersonNr InLnr ///
	using "${data}/003_MainWithEnsemblePred_Full_2006_xMonthPred_yMonthModel.dta" ///
	, keepusing(p_emplAft6M_0M_In_6M_Mod) ///
	 assert(2 3) keep(3) nogen
	 
* Merge with predictions made with the 2009 model:
rename p_emplAft6M_0M_In p_emplAft6M_0M_In_06_Mod

merge 1:1 LopNr_PersonNr InLnr ///
	using "${data}/003_MainWithEnsemblePred_Full_2006Individuals_TrainedOn2009modelIndividuals_Full.dta"  ///
	, keepusing(p_emplAft6M_0M_In) ///
	 assert(2 3) keep(3) nogen

rename p_emplAft6M_0M_In p_emplAft6M_0M_In_09_Mod
rename p_emplAft6M_0M_In_06_Mod p_emplAft6M_0M_In
	 
* Generate ratio:
gen jfr_ratio_6M_0M = p_emplAft6M_0M_In_6M_Mod / p_emplAft6M_0M_In
gen l_jfr_ratio_6M_0M = log(jfr_ratio_6M_0M)

gen jfr_ratio_09_06 = p_emplAft6M_0M_In_09_Mod / p_emplAft6M_0M_In
gen l_jfr_ratio_09_06 = log(jfr_ratio_09_06)


* Plot histogram of ratio:
sum jfr_ratio_6M_0M, detail
local mean_0 : di %9.3f r(mean)
local sd_0 : di %9.3f `=`r(sd)''

histogram jfr_ratio_6M_0M if inrange(jfr_ratio_6M_0M, 0.45, 1.55), frequency fcolor(ebblue*0.5) lcolor(ebblue) ///
	addplot(scatteri 0 0.40 14000 1.6, msize(0)) ///
	graphregion(color(white)) /// 	
	plotregion(margin(b=0 y=0)) ///
	legend(on order( ///
		- "Mean = `mean_0'" ///
		- "Std. Dev. = `sd_0'") ///
		symxsize(*0.5) size(small) cols(1) pos(1) ring(0) justification(right) placement(right) )			///
	graphregion(color(white)) /// 
	ytitle("Frequency", size(9pt)) ///
	xtitle("Ratio of Job-Finding Probability 6 Months into Spell / at Start of Spell", axis(1) size(9pt)) ///
	xlabel(0.5(0.5)1.5) ///
	ylabel(0(2500)12400, format(%11.0fc) labsize(10pt) angle(0)) yscale(titlegap(2)) ///
	name(dist_6M_0M, replace)
	
graph export "${output}/${id_code}_PropHazTest_Histogram_6Mvs0M.pdf", replace 	


sum jfr_ratio_09_06, detail
local mean_0 : di %9.3f r(mean)
local sd_0 : di %9.3f `=`r(sd)''
local p99 = r(p99)

histogram jfr_ratio_09_06 if inrange(jfr_ratio_09_06, 0.45, 1.55), frequency fcolor(ebblue*0.5) lcolor(ebblue) ///
	addplot(scatteri 0 0.40 14000 1.6, msize(0)) ///
	graphregion(color(white)) /// 	
	plotregion(margin(b=0 y=0)) ///
	legend(on order( ///
		- "Mean = `mean_0'" ///
		- "Std. Dev. = `sd_0'") ///
		symxsize(*0.5) size(small) cols(1) pos(1) ring(0) justification(right) placement(right) )			///
	graphregion(color(white)) /// 
	ytitle("Frequency", size(9pt)) ///
	xtitle("Ratio of Job-Finding Probability at Start of Spell in 2009 / in 2006", axis(1) size(9pt)) ///
	xlabel(0.5(0.5)1.5) ///
	ylabel(0(2500)12500, format(%11.0fc) labsize(10pt) angle(0)) yscale(titlegap(2)) ///
	name(dist_09_06, replace)
	
graph export "${output}/${id_code}_PropHazTest_Histogram_2009vs2006.pdf", replace 

* Merge dataset with with characteristics
global controls $demoEdu $incIndiv $incOther $emplHist $incHist ///
	$migHist $indu $mun

merge 1:1 LopNr_PersonNr InLnr ///
	using "${data}/002_DataForR_Full_2006", ///
	keepusing(LopNr_PersonNr InLnr $controls ) ///
	assert(2 3) keep(3) nogen

* Generate tertiary education dummy:
gen educ_tert = EducLevel_dummy5 + EducLevel_dummy6
gen educ_sec = EducLevel_dummy3 + EducLevel_dummy4
gen educ_prim = EducLevel_dummy1 + EducLevel_dummy2

label variable age "Age"
label variable Gender "Female"
label variable L_N_Kids "Kids"
label variable L_N_Kids_U18 "Kids <18"
label variable foreign "Foreign"
label variable educ_tert "Higher Edu."
label variable educ_sec "Secondary Edu."
label variable educ_prim "Primary Edu."
label variable L_WageInc_adj "Labour Income"
label variable L_FamInc_adj "Family Income"
label variable L_OtherInc_adj "Other Income"
label variable DaysUnemp_2Years "Days on UI (2y)"
label variable DaysOnDI_2Years "Days on DI (2y)"
label variable unemplSpells2Ybefore  "Unemp. Spells (2y)"
label variable tenure "Tenure"

* Store control variables in macros:
global controls_sel age Gender L_N_Kids L_N_Kids_U18 foreign educ_tert educ_sec ///
	L_WageInc_adj ///
	L_OtherInc_adj L_FamInc_adj ///
	DaysUnemp_2Years unemplSpells2Ybefore DaysOnDI_2Years tenure


**** 3.3. Regressing the outcomes on a subset of the covariates

* First, standardize the variables:
egen std_jfr_ratio_6M_0M = std(jfr_ratio_6M_0M)
egen std_l_jfr_ratio_6M_0M = std(l_jfr_ratio_6M_0M)

egen std_jfr_ratio_09_06 = std(jfr_ratio_09_06)
egen std_l_jfr_ratio_09_06 = std(l_jfr_ratio_09_06)

egen std_p_emplAft6M_0M_In = std(p_emplAft6M_0M_In)
label variable std_p_emplAft6M_0M_In "Pred. JFR"

global std_controls_sel 
foreach v of varlist $controls_sel {
	qui sum `v'
	if `=r(sd)' != 0 { 
		cap egen std_`v' = std(`v')
		global std_controls_sel $std_controls_sel std_`v'
	}
	else if `=r(sd)' == 0 {
		global std_controls_sel $std_controls_sel `v'
	}
}

* Now regress on other variables and store the coefficient estimates:
reg std_l_jfr_ratio_6M_0M std_p_emplAft6M_0M_In $std_controls_sel
estimates store est_6M_0M

reg std_l_jfr_ratio_09_06 std_p_emplAft6M_0M_In $std_controls_sel
estimates store est_09_06

* Fix variable labels for the plots:
label variable std_age "Age"
label variable std_Gender "Female"
label variable std_L_N_Kids "Kids"
label variable std_L_N_Kids_U18 "Kids <18"
label variable std_foreign "Foreign"
label variable std_L_WageInc_adj "Labour Income"
label variable std_DaysUnemp_2Years "Days on UI (2y)"
label variable std_DaysOnDI_2Years "Days on DI (2y)"
label variable std_unemplSpells2Ybefore  "Unemp. Spells (2y)"
label variable std_educ_tert "Higher Edu."
label variable std_educ_sec "Secondary Edu."
label variable std_L_FamInc_adj "Household Income"
label variable std_L_OtherInc_adj "Other Income"
label variable std_tenure "Tenure"

	
* Plot coefficients using coefplot:
coefplot ///
	(est_6M_0M, label("Duration Dep. (F{sub:6} / F{sub:0})") color(orange_red) ciopts(color(orange_red))) /// 
	(est_09_06, label("Cyclicality (F{sup:2009} / F{sup:2006})") color(ebblue) ciopts(color(ebblue))) /// 
	, ///
	norecycle recast(bar) barwidth(0.4) cirecast(rcap) citop drop(_cons) ///
	orderby(2) ///
	coeflabels(, labsize(small)) ///
	groups(std_age std_educ_sec = `""{bf:Socio-}" "{bf:demographics}""' ///
		std_L_WageInc_adj std_L_FamInc_adj = `""{bf:Income}" "{bf:Variables}""' ///
		std_DaysUnemp_2Years std_tenure = `""{bf:Employment}" "{bf:History}""' ) ///
	fcolor(*0.5) xline(0) msize(2) ///
	/* xlabel(-.5(.25).5) */ ///
	xtitle("Regression Coefficient") ///
	legend(rows(1) size(small) symxsize(*0.6)) ///
	byopts(graphregion(color(white))) ///
	graphregion(color(white)) ///
	name(coef, replace)
	
graph export "${output}/${id_code}_PropHazTest_Regression.pdf", replace 